This Rmarkdown document contains the code to generate the community-level analyses of the EHI DNA extraction method test.

Index

Load libraries

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(ape))
suppressPackageStartupMessages(library(distillR))
suppressPackageStartupMessages(library(phyloseq))
suppressPackageStartupMessages(library(hilldiv2))
suppressPackageStartupMessages(library(ggh4x))
suppressPackageStartupMessages(library(tinytable))
suppressPackageStartupMessages(library(vegan))

Amphibians

Calotriton asper

Retrieve data

species="Calotriton asper"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)

read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0121/DMB0121.tree.gz", "data/DMB0121.tree.gz")
genome_tree <- read_tree("data/DMB0121.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_gpku6szt1ljkp9946c58
Extraction richness neutral phylogenetic
E1 107.0 51.75880 6.406283
E2 97.5 48.75158 6.219160
E3 106.5 51.56757 6.311931

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_fzbb4uee7rwpphhhc9yc
relationship richness neutral phylogenetic
inter 0.29497046 0.381599435 0.105560612
intra 0.03215577 0.007477988 0.001740429

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Lissotriton helveticus

Retrieve data

species="Lissotriton helveticus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0124/DMB0124.tree.gz", "data/DMB0124.tree.gz")
genome_tree <- read_tree("data/DMB0124.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_ryo1b74b0qh1vnpxvmla
Extraction richness neutral phylogenetic
E1 22 12.05083 4.439034
E2 21 10.73360 3.336904
E3 22 12.15999 4.408855

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_fhz3plf0xum0furgq3my
relationship richness neutral phylogenetic
inter 0.9090909 0.9487207 0.3681096
intra 0.2288066 0.1798833 0.1289869

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Salamandra atra

Retrieve data

species="Salamandra atra"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0129/DMB0129.tree.gz", "data/DMB0129.tree.gz")
genome_tree <- read_tree("data/DMB0129.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_u9oaid1npt6b2ogx6zuu
Extraction richness neutral phylogenetic
E1 70.0 35.33416 6.024485
E2 58.5 30.15278 5.653610
E3 84.0 38.29817 6.183859

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_zmp3sc9sjijscqq5akq9
relationship richness neutral phylogenetic
inter 0.3854701 0.3862921 0.125862853
intra 0.1422510 0.0315443 0.007370115

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Birds

Geospizopsis unicolor

Retrieve data

species="Geospizopsis unicolor"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0123/DMB0123.tree.gz", "data/DMB0123.tree.gz")
genome_tree <- read_tree("data/DMB0123.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

Alpha diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.

Beta diversity

Beta diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.

Perisoreus infaustus

Retrieve data

species="Perisoreus infaustus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0126/DMB0126.tree.gz", "data/DMB0126.tree.gz")
genome_tree <- read_tree("data/DMB0126.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

Alpha diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.

Beta diversity

Beta diversity is not calculated as only one sample contains enough genome coverage to calculate diversity estimates.

Zonotrichia capensis

The number of reconstructed genomes was too low for community analyses.

Mammals

Plecotus auritus

Retrieve data

species="Plecotus auritus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0127/DMB0127.tree.gz", "data/DMB0127.tree.gz")
genome_tree <- read_tree("data/DMB0127.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_7j39b9jaddfrsbuuzq2a
Extraction richness neutral phylogenetic
E1 12.5 5.070885 2.691939
E2 6.5 4.042292 2.282549
E3 11.5 5.348040 2.739107

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_17fol9qxndrt08n2ywgu
relationship richness neutral phylogenetic
inter 0.6727759 0.82731396 0.68954392
intra 0.2123748 0.03113448 0.01336019

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Sciurus carolinensis

Retrieve data

species="Sciurus carolinensis"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0130/DMB0130.tree.gz", "data/DMB0130.tree.gz")
genome_tree <- read_tree("data/DMB0130.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_3jigi60mstl55zynyxeh
Extraction richness neutral phylogenetic
E1 95.0 39.66320 5.557244
E2 107.5 59.13437 6.138243
E3 96.0 47.74310 5.728024

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_120b3q9czm6ofjsblkl4
relationship richness neutral phylogenetic
inter 0.4447768 0.6554629 0.19033972
intra 0.1823044 0.1211974 0.03562178

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Trichosurus vulpecula

These data are pending to be produced.

Reptiles

Chalcides striatus

Retrieve data

species="Chalcides striatus"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0122/DMB0122.tree.gz", "data/DMB0122.tree.gz")
genome_tree <- read_tree("data/DMB0122.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_03hasej4bk3sbbfrzj3y
Extraction richness neutral phylogenetic
E1 60.5 31.68716 5.314585
E2 74.0 37.04400 5.254353
E3 64.5 34.15145 5.180680

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_dqkgdpi9cctm7nb5gdsr
relationship richness neutral phylogenetic
inter 0.4109306 0.45373698 0.09334837
intra 0.1286148 0.06449944 0.02626687

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Natrix astreptophora

Retrieve data

species="Natrix astreptophora"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0125/DMB0125.tree.gz", "data/DMB0125.tree.gz")
genome_tree <- read_tree("data/DMB0125.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_dgyopdfmxds322mrx0z1
Extraction richness neutral phylogenetic
E1 37.5 16.30449 3.270384
E2 24.0 13.43764 3.065722
E3 29.5 15.34095 3.211162

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_ws1w28eyendsk8bag6li
relationship richness neutral phylogenetic
inter 0.5386629 0.65025916 0.18128644
intra 0.2756370 0.04665216 0.01166948

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))

Podarcis muralis

Retrieve data

species="Podarcis muralis"
genus=strsplit(species, " ")[[1]][1] %>% tolower()

sample_metadata <- read_tsv(paste0("data/metadata.tsv")) %>%
    rename(dataset=Dataset) %>%
    filter(Species == species)


read_counts <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128_counts.tsv.gz") %>%
  rename(genome = 1)

genome_metadata <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128_mag_info.tsv.gz") %>%
  rename(genome = 1, length=mag_size)

genome_coverage <- read_tsv("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128_coverage.tsv.gz") %>%
  rename(genome = 1)

download.file("https://sid.erda.dk/share_redirect/BaMZodj9sA/DMB/DMB0128/DMB0128.tree.gz", "data/DMB0128.tree.gz")
genome_tree <- read_tree("data/DMB0128.tree.gz")

Filter data

#Filter by coverage
min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]]))

# Transform  to genome counts (non-filtered)
readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

# Transform to genome counts (coverage-filtered)
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

Community barplot

# Retrieve EHI taxonomy colors
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Stacked barplot
genome_counts %>%
    mutate_at(vars(-genome), funs(./sum(., na.rm = TRUE)))  %>% #apply TSS nornalisation
    pivot_longer(-genome, names_to = "dataset", values_to = "count") %>%
    left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append taxonomy
    left_join(., sample_metadata, by = join_by(dataset == dataset)) %>% #append taxonomy
    ggplot(., aes(x=dataset,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
          geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
          scale_fill_manual(values=phylum_colors) +
          labs(y = "Relative abundance") +
          facet_nested(. ~ Sample + Extraction, scales="free_x") +
          guides(fill = guide_legend(ncol = 3)) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          axis.title.x = element_blank(),
                panel.background = element_blank(),
                panel.border = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
                legend.position="none",
                legend.title=element_blank())

Alpha diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=0) %>%
            t() %>%
            as.data.frame() %>%
            rename(richness=1) %>%
            rownames_to_column(var="dataset")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1) %>%
            t() %>%
            as.data.frame() %>%
            rename(neutral=1) %>%
            rownames_to_column(var="dataset")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hilldiv(.,q=1,tree=genome_tree) %>%
            t() %>%
            as.data.frame() %>%
            rename(phylogenetic=1) %>%
            rownames_to_column(var="dataset")

# Merge alpha diversities
alpha_diversity <- richness %>%
      full_join(neutral,by=join_by(dataset==dataset)) %>%
      full_join(phylogenetic,by=join_by(dataset==dataset))

# Write alpha diversities
alpha_diversity %>% write_tsv(paste0("results/alpha_",genus,".tsv"))

# Print alpha diversity
alpha_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(dataset==dataset)) %>%
  group_by(Extraction) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_q2uqnbt2g03ys6ctkxbm
Extraction richness neutral phylogenetic
E1 80.5 33.09492 4.689994
E2 78.5 34.42602 4.745518
E3 73.0 32.71816 4.716865

Beta diversity

#Calculate Hill numbers
richness <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C", out="pair")

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C", out="pair")

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C", out="pair")

# Merge beta diversities
beta_diversity <- richness %>%
      full_join(neutral,by=c("first", "second")) %>%
      full_join(phylogenetic,by=c("first", "second")) %>%
      rename(richness=C.x, neutral=C.y, phylogenetic=C)

# Write beta diversities
beta_diversity %>% write_tsv(paste0("results/beta_",genus,".tsv"))

# Print beta diversities
beta_diversity %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(first==dataset)) %>%
  left_join(sample_metadata %>% select(dataset,Extraction,Sample),by=join_by(second==dataset)) %>%
  mutate(relationship = case_when(
    Extraction.x == Extraction.y & Sample.x != Sample.y ~ "inter",
    Extraction.x != Extraction.y & Sample.x == Sample.y ~ "intra",
    TRUE ~ NA_character_  # Handle other cases if needed
  )) %>%
  filter(relationship != is.na(relationship)) %>%
  group_by(relationship) %>%
  summarise(richness=mean(richness), neutral=mean(neutral), phylogenetic=mean(phylogenetic)) %>%
  tt()
tinytable_txb4c6svu6tsoz167o6d
relationship richness neutral phylogenetic
inter 0.2864067 0.41508917 0.122770382
intra 0.0446703 0.00905735 0.003367558

Variance partitioning

richness <-genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=0, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

neutral <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

phylogenetic <- genome_counts_filt %>%
            column_to_rownames(var="genome") %>%
            select(where(~!all(. == 0))) %>%
            hillpair(.,q=1, tree=genome_tree, metric="C") %>%
            as.dist() %>%
            adonis2(formula = . ~ Sample + Extraction, data = sample_metadata %>% arrange(match(dataset,colnames(genome_counts_filt))), permutations = 999, method = "bray", sqrt.dist = TRUE) %>%
            broom::tidy() %>%
            filter(term == "Extraction") %>%
            select(R2) %>% pull()

tibble(species=rep(species,3),metric=c("richness","neutral","phylogenetic"),r2=c(richness,neutral,phylogenetic)) %>%
      write_tsv(paste0("results/var_",genus,".tsv"))